Packages

#Load all packages
library(plotrix)
library(arulesViz)
#devtools::install_github("rsquaredacademy/olsrr")
library(reshape2) 
library(ggcorrplot)
library(spatialreg)
library(rgdal)
library(spgwr)
library(car)
library(caret)
library(spdep)
library(tidyr)
library(olsrr)
library(car)
library(ggplot2)
library(corrplot)
library(janitor)
library(maptools)
library(RColorBrewer)
library(classInt)
library(OpenStreetMap)
library(sp)
library(rgeos)
library(tmap)
library(tmaptools)
library(sf)
library(rgdal)
library(geojsonio)
library(openxlsx)
library(plotly)
library(tidyverse)
library(dplyr)
library(assertive)
library(plyr)
library(naniar)

SECTION 1: Data management

Read in national child measurement data and convert to a data frame.

######Read in national child measurement data and convert to a data frame.
obesitydata<-read.csv("data/yr6_childmeasurement201819.csv")
obesitydata<-data.frame(obesitydata)
childdata<-obesitydata[-grep("^E1",obesitydata[,2]),]
childdata<- childdata %>% dplyr::filter(!(ONS.Code==""))

Rename LA variable and more data cleasning.

#Rename LA variable
names(childdata)[1] <- c("LA")
childdata<- childdata %>% dplyr::filter(!(LA=="ENGLAND1"))
class(childdata)
## [1] "data.frame"
#check dataset
names(childdata)
##  [1] "LA"                       "ONS.Code"                
##  [3] "Underweight_number"       "Underweight_percent"     
##  [5] "Lower.CI"                 "Upper..CI"               
##  [7] "Healthyweight_number"     "Healthyweight_percent"   
##  [9] "Lower.CI.1"               "Upper..CI.1"             
## [11] "Overweight_number"        "Overweight_percent"      
## [13] "Lower.CI.2"               "Upper..CI.2"             
## [15] "Obese_number"             "Obese_percent"           
## [17] "Lower.CI.3"               "Upper..CI.3"             
## [19] "Severely.obese_number"    "Severely.obese_percent"  
## [21] "Lower.CI.4"               "Upper..CI.4"             
## [23] "overandobese_number"      "overandobese_percent"    
## [25] "Lower.CI.5"               "Upper..CI.5"             
## [27] "Total.number.of.children"
summary(childdata)
##             LA           ONS.Code   Underweight_number  Underweight_percent
##  Adur        :  1   E06000001:  1   12     : 26        x          : 22     
##  Allerdale   :  1   E06000002:  1   x      : 22        1.009251472:  2     
##  Amber Valley:  1   E06000003:  1   20     : 15        1.652892562:  2     
##  Arun        :  1   E06000004:  1   11     : 14        0.468854655:  1     
##  Ashfield    :  1   E06000005:  1   16     : 14        0.516647532:  1     
##  Ashford     :  1   E06000006:  1   9      : 14        0.606673407:  1     
##  (Other)     :318   (Other)  :318   (Other):219        (Other)    :295     
##         Lower.CI         Upper..CI   Healthyweight_number Healthyweight_percent
##  x          : 22   x          : 22   x      : 22          x          : 22      
##  0.578266703:  2   1.755778497:  2   565    :  3          53.64311246:  1      
##  0.227296963:  1   0.964643488:  1   633    :  3          54.44978954:  1      
##  0.27204815 :  1   0.979008108:  1   661    :  3          54.85436893:  1      
##  0.278331702:  1   1.103631632:  1   1036   :  2          55.19930676:  1      
##  0.293794974:  1   1.134294839:  1   1063   :  2          55.29437136:  1      
##  (Other)    :296   (Other)    :296   (Other):289          (Other)    :297      
##        Lower.CI.1       Upper..CI.1  Overweight_number Overweight_percent
##  x          : 22   x          : 22   Min.   :  34.0    Min.   : 8.744    
##  52.0194643 :  1   55.2590729 :  1   1st Qu.: 139.0    1st Qu.:13.144    
##  52.3176109 :  1   56.13619347:  1   Median : 186.5    Median :13.972    
##  52.75311863:  1   56.26684873:  1   Mean   : 260.4    Mean   :13.876    
##  53.43402389:  1   56.72044107:  1   3rd Qu.: 325.0    3rd Qu.:14.673    
##  53.75488029:  1   57.16131779:  1   Max.   :2268.0    Max.   :18.421    
##  (Other)    :297   (Other)    :297                                       
##    Lower.CI.2      Upper..CI.2     Obese_number    Obese_percent   
##  Min.   : 7.143   Min.   :10.66   Min.   :  37.0   Min.   : 9.528  
##  1st Qu.:11.327   1st Qu.:15.04   1st Qu.: 171.5   1st Qu.:15.622  
##  Median :12.277   Median :15.72   Median : 237.0   Median :18.866  
##  Mean   :12.169   Mean   :15.80   Mean   : 373.9   Mean   :18.846  
##  3rd Qu.:13.150   3rd Qu.:16.64   3rd Qu.: 470.0   3rd Qu.:21.661  
##  Max.   :15.541   Max.   :21.70   Max.   :3980.0   Max.   :29.557  
##                                                                    
##    Lower.CI.3      Upper..CI.3    Severely.obese_number Severely.obese_percent
##  Min.   : 7.946   Min.   :11.39   35.00  :  9           2.93   :  4           
##  1st Qu.:13.542   1st Qu.:17.82   20.00  :  8           1.50   :  3           
##  Median :16.755   Median :21.00   34.00  :  8           2.22   :  3           
##  Mean   :16.921   Mean   :20.96   23.00  :  7           2.57   :  3           
##  3rd Qu.:19.994   3rd Qu.:23.82   60.00  :  7           2.97   :  3           
##  Max.   :28.097   Max.   :31.06   17.00  :  6           3.09   :  3           
##                                   (Other):279           (Other):305           
##    Lower.CI.4   Upper..CI.4  overandobese_number overandobese_percent
##  2.13   :  4   4.44   :  5   Min.   :  71.0      Min.   :19.06       
##  2.21   :  4   5.73   :  4   1st Qu.: 319.5      1st Qu.:29.48       
##  0.83   :  3   3.28   :  3   Median : 419.5      Median :32.84       
##  1.28   :  3   4.04   :  3   Mean   : 634.2      Mean   :32.72       
##  1.29   :  3   5.24   :  3   3rd Qu.: 807.2      3rd Qu.:36.15       
##  1.92   :  3   5.45   :  3   Max.   :6248.0      Max.   :44.93       
##  (Other):304   (Other):303                                           
##    Lower.CI.5     Upper..CI.5    Total.number.of.children
##  Min.   :16.87   Min.   :21.46   Min.   :  226           
##  1st Qu.:26.88   1st Qu.:32.15   1st Qu.: 1001           
##  Median :30.32   Median :35.59   Median : 1343           
##  Mean   :30.33   Mean   :35.22   Mean   : 1850           
##  3rd Qu.:34.06   3rd Qu.:38.78   3rd Qu.: 2333           
##  Max.   :43.32   Max.   :46.64   Max.   :15478           
## 
#Rename ons code variable
names(childdata)[2] <- c("2011GSS_CODE")



#Keep obesity related variables to reduce size of file.
childdata<- childdata[,c( "LA","2011GSS_CODE","Obese_percent","overandobese_percent", "Severely.obese_percent", "Total.number.of.children")]    
#Change variables into numeric variables
childdata$Obese_percent_num <- as.numeric(childdata$"Obese_percent")
str(childdata)
## 'data.frame':    324 obs. of  7 variables:
##  $ LA                      : Factor w/ 362 levels "","Adur","Allerdale",..: 69 76 116 136 187 193 206 213 231 276 ...
##  $ 2011GSS_CODE            : Factor w/ 362 levels "","E06000001",..: 48 6 293 2 3 277 278 56 4 279 ...
##  $ Obese_percent           : num  22.4 22.5 24.2 26.9 24.7 ...
##  $ overandobese_percent    : num  37.7 37.6 37.8 43.8 39.7 ...
##  $ Severely.obese_percent  : Factor w/ 248 levels "","1.16","1.18",..: 200 152 188 234 229 235 192 98 163 227 ...
##  $ Total.number.of.children: num  5664 1198 1912 1154 1821 ...
##  $ Obese_percent_num       : num  22.4 22.5 24.2 26.9 24.7 ...
childdata$Severely.obese_percent<- revalue(childdata$"Severely.obese_percent", c("x"="0"))
childdata$Severely.obese_percentnum <-as.numeric(paste((childdata$"Severely.obese_percent")))
#Create obesity rate variable.
childdata$obesity_rate<- childdata$Obese_percent_num +childdata$Severely.obese_percentnum
childdata<- childdata[,c( "LA","2011GSS_CODE","Obese_percent","overandobese_percent", "obesity_rate", "Total.number.of.children")]   


#NCMP data uses 2011 LA ecode, LA code changed for 2019, need to add in new code to allow IMD variables to be fully merged.
childdata$GSS_CODE<- (childdata$"2011GSS_CODE")
#New 2019 code: E06000058 - Bournemouth, Christchurch and Poole - new unitary authority instead of:
#Old code:E06000028 Bournemouth,E06000029   Poole,E07000048 Christchurch
childdata$GSS_CODE<- revalue(childdata$"GSS_CODE", c("E06000028"="E06000058","E06000029"="E06000058","E07000048"="E06000058"))

#New 2019 code:E06000059 - Dorset - new unitary authority - (Dorset county abolished)
#Old code:E07000049 East Dorset,E07000050   North Dorset,E07000051  Purbeck,E07000052   West Dorset,E07000053   Weymouth and Portland
childdata$GSS_CODE<- revalue(childdata$"GSS_CODE", c("E07000049"="E06000059","E07000050"="E06000059","E07000051"="E06000059",
                                                     "E07000052"="E06000059","E07000053"="E06000059"))


#New 2019 code:E07000244 - East Suffolk - new local authority district (Suffolk Coastal and Waveney districts abolished)
#Old code:E07000205 Suffolk Coastal,E07000206   Waveney
childdata$GSS_CODE<- revalue(childdata$"GSS_CODE", c("E07000205"="E07000244","E07000206"="E07000244"))


#New 2019 code:E07000245 - West Suffolk - new local authority district (Forest Heath and St Edmundsbury districts abolished)
#Old code:E07000201 Forest Heath,E07000204  St. Edmundsbury
childdata$GSS_CODE<- revalue(childdata$"GSS_CODE", c("E07000201"="E07000245","E07000204"="E07000245"))

#New 2019 code:E07000246 - Somerset West and Taunton - new local authority district (Taunton Deane and West Somerset districts abolished)
#Old code:E07000190 Taunton Deane,E07000191 West Somerset
childdata$GSS_CODE<- revalue(childdata$"GSS_CODE", c("E07000190"="E07000246","E07000191"="E07000246"))

#####Fast food shop density file
fastfood<-read.csv("data/fastfood_outlet.csv")
fastfood<-data.frame(fastfood)
is.na(fastfood) <- fastfood == "*"
fastfood$fast_food_density<-as.numeric(paste((fastfood$"Rate.per.100.000.population")))
#La code for this data is for 2018, change some of the codes to 2011 La code to allow full merge into main data.
#change E06000048   Northumberland to   E06000057
#change E07000097   East Hertfordshire  to  E07000242
#change E07000100   St Albans   to  E07000240
#change E07000101   Stevenage   to  E07000243
#change E07000104   Welwyn Hatfield to  E07000241
#change E08000020   Gateshead   to  E08000037
fastfood$LA.code<- revalue(fastfood$"LA.code", c("E06000048"="E06000057"))
fastfood$LA.code<- revalue(fastfood$"LA.code", c("E07000097"="E07000242"))
fastfood$LA.code<- revalue(fastfood$"LA.code", c("E07000100"="E07000240"))
fastfood$LA.code<- revalue(fastfood$"LA.code", c("E07000101"="E07000243"))
fastfood$LA.code<- revalue(fastfood$"LA.code", c("E07000104"="E07000241"))
fastfood$LA.code<- revalue(fastfood$"LA.code", c("E08000020"="E08000037"))
names(fastfood)[2] <- c("2011GSS_CODE")
#Merge into main data.
childdata <- merge(x = childdata, y = fastfood, by = "2011GSS_CODE", all.x = TRUE)

#####Ethnicity data uses the 2011 ecode as well, merge in ethnicity data first.
ethnicity<-read.csv("data/census_ethnicity.csv")
ethnicity<-data.frame(ethnicity)
names(ethnicity)[3] <- c("2011GSS_CODE")
#Only keep variables of interest to reduce size of data.
ethnicity_por<- ethnicity[,c("2011GSS_CODE","Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..All.categories..Ethnic.group..measures..Value",
                             "Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..White..Total..measures..Value", 
                             "Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..Mixed.multiple.ethnic.group..Total..measures..Value",
                             "Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..Asian.Asian.British..Total..measures..Value",
                             "Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..Black.African.Caribbean.Black.British..Total..measures..Value",
                             "Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..Other.ethnic.group..Total..measures..Value")]


#Merge with main dataset
childdata2 <- merge(x = childdata, y = ethnicity_por, by = "2011GSS_CODE", all.x = TRUE)


#Summarise the ethnicity and obesity data for the unitary local authorites that were combined in 2019.
childdata2 <-data.frame(aggregate(. ~ GSS_CODE,childdata2, sum))

#Create percentage variables for each ethnic group.
childdata2$percent_white<- childdata2[[ "Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..White..Total..measures..Value"]]/childdata2[["Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..All.categories..Ethnic.group..measures..Value"]]
childdata2$percent_mix<- childdata2[["Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..Mixed.multiple.ethnic.group..Total..measures..Value"]]/childdata2[["Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..All.categories..Ethnic.group..measures..Value"]]
childdata2$percent_asian<- childdata2[["Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..Asian.Asian.British..Total..measures..Value"]]/childdata2[["Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..All.categories..Ethnic.group..measures..Value"]]
childdata2$percent_african<- childdata2[[ "Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..Black.African.Caribbean.Black.British..Total..measures..Value"]]/childdata2[["Sex..All.persons..Age..Age.0.to.24..Ethnic.Group..All.categories..Ethnic.group..measures..Value"]]




#New 2019 code: E06000058 - Bournemouth, Christchurch and Poole - new unitary authority instead of:
#Old code:E06000028 Bournemouth,E06000029   Poole,E07000048 Christchurch

childdata2$obesity_rate<- with(childdata2, ifelse(GSS_CODE=='E06000058',(childdata2$obesity_rate)/3,obesity_rate))
childdata2$fast_food_density<- with(childdata2, ifelse(GSS_CODE=='E06000058',(childdata2$fast_food_density)/3,fast_food_density))

#New 2019 code:E06000059 - Dorset - new unitary authority - (Dorset county abolished)
#Old code:E07000049 East Dorset,E07000050   North Dorset,E07000051  Purbeck,E07000052   West Dorset,E07000053   Weymouth and Portland

childdata2$obesity_rate<- with(childdata2, ifelse(GSS_CODE=='E06000059',(childdata2$obesity_rate)/5,obesity_rate))
childdata2$fast_food_density<- with(childdata2, ifelse(GSS_CODE=='E06000059',(childdata2$fast_food_density)/5,fast_food_density))
#New 2019 code:E07000244 - East Suffolk - new local authority district (Suffolk Coastal and Waveney districts abolished)
#Old code:E07000205 Suffolk Coastal,E07000206   Waveney
childdata2$obesity_rate<- with(childdata2, ifelse(GSS_CODE=='E07000244',(childdata2$obesity_rate)/2,obesity_rate))
childdata2$fast_food_density<- with(childdata2, ifelse(GSS_CODE=='E07000244',(childdata2$fast_food_density)/2,fast_food_density))
#New 2019 code:E07000245 - West Suffolk - new local authority district (Forest Heath and St Edmundsbury districts abolished)
#Old code:E07000201 Forest Heath,E07000204  St. Edmundsbury
childdata2$obesity_rate<- with(childdata2, ifelse(GSS_CODE=='E07000245',(childdata2$obesity_rate)/2,obesity_rate))
childdata2$fast_food_density<- with(childdata2, ifelse(GSS_CODE=='E07000245',(childdata2$fast_food_density)/2,fast_food_density))

#New 2019 code:E07000246 - Somerset West and Taunton - new local authority district (Taunton Deane and West Somerset districts abolished)
#Old code:E07000190 Taunton Deane,E07000191 West Somerset
childdata2$obesity_rate<- with(childdata2, ifelse(GSS_CODE=='E07000246',(childdata2$obesity_rate)/2,obesity_rate))
childdata2$fast_food_density<- with(childdata2, ifelse(GSS_CODE=='E07000246',(childdata2$fast_food_density)/2,fast_food_density))

######Read in multiple excel tabs for the imd data.

IMD_income <- read.xlsx('data/IMD2019.xlsx',sheet=3)
IMD_employ <- read.xlsx('data/IMD2019.xlsx',sheet=4)
IMD_edu <- read.xlsx('data/IMD2019.xlsx',sheet=5)
IMD_health <- read.xlsx('data/IMD2019.xlsx',sheet=6)
IMD_crime <- read.xlsx('data/IMD2019.xlsx',sheet=7)
IMD_barriers <- read.xlsx('data/IMD2019.xlsx',sheet=8)
IMD_living <- read.xlsx('data/IMD2019.xlsx',sheet=9)
IMD_idaci <- read.xlsx('data/IMD2019.xlsx',sheet=10)
#Put data into data frames.

IMD_income <- data.frame(IMD_income)
IMD_employ <- data.frame(IMD_employ)
IMD_edu <- data.frame(IMD_edu)
IMD_health <- data.frame(IMD_health)
IMD_crime <- data.frame(IMD_crime)
IMD_barriers <- data.frame(IMD_barriers)
IMD_living <- data.frame(IMD_living)
IMD_idaci <- data.frame(IMD_idaci)

#Rename ons code variable
names(childdata)[2] <- c("GSS_CODE")
names(IMD_income)[1] <- c("GSS_CODE")
names(IMD_employ)[1] <- c("GSS_CODE")
names(IMD_edu)[1] <- c("GSS_CODE")
names(IMD_health)[1] <- c("GSS_CODE")
names(IMD_crime)[1] <- c("GSS_CODE")
names(IMD_barriers)[1] <- c("GSS_CODE")
names(IMD_living)[1] <- c("GSS_CODE")
names(IMD_idaci)[1] <- c("GSS_CODE")
#Remove duplicate variables in these files.
IMD_income <-within(IMD_income, rm("Local.Authority.District.name..2019."))
IMD_employ <-within(IMD_employ , rm("Local.Authority.District.name..2019."))
IMD_edu <-within(IMD_edu, rm("Local.Authority.District.name..2019."))
IMD_health <-within(IMD_health, rm("Local.Authority.District.name..2019."))
IMD_crime <-within(IMD_crime, rm("Local.Authority.District.name..2019."))
IMD_barriers <-within(IMD_barriers, rm("Local.Authority.District.name..2019."))
IMD_living <-within(IMD_living, rm("Local.Authority.District.name..2019."))
IMD_idaci <-within(IMD_idaci, rm("Local.Authority.District.name..2019."))




#Merge the datasets
childdata2 <- merge(x = childdata2, y = IMD_income, by = "GSS_CODE", all.x = TRUE)
childdata2 <- merge(x = childdata2, y = IMD_employ, by = "GSS_CODE", all.x = TRUE)
childdata2 <- merge(x = childdata2, y = IMD_edu, by = "GSS_CODE", all.x = TRUE)
childdata2 <- merge(x = childdata2, y = IMD_health, by = "GSS_CODE", all.x = TRUE)
childdata2 <- merge(x = childdata2, y = IMD_crime, by = "GSS_CODE", all.x = TRUE)
childdata2 <- merge(x = childdata2, y = IMD_barriers, by = "GSS_CODE", all.x = TRUE)
childdata2 <- merge(x = childdata2, y = IMD_living, by = "GSS_CODE", all.x = TRUE)
childdata2 <- merge(x = childdata2, y = IMD_idaci, by = "GSS_CODE", all.x = TRUE)



#Physical activity data
activity <- read.xlsx('data/childactivity.xlsx',sheet=1)
activity<-data.frame(activity)
exercise<- activity[,c("La","GSS_CODE","X60minplus_Rate....","fairly_active_Rate....","Less_active_Rate....")]
is.na(exercise) <- exercise == "^"
is.na(exercise) <- exercise == "*"
exercise$active_rate <-as.numeric(paste((exercise$"X60minplus_Rate....")))
exercise$fairly_active_rate <-as.numeric(paste((exercise$"fairly_active_Rate....")))
exercise$less_active_rate <-as.numeric(paste((exercise$"Less_active_Rate....")))
exercise<- exercise[,c("La","GSS_CODE","active_rate", "fairly_active_rate","less_active_rate")]
#Merge with main dataset
childdata2 <- merge(x = childdata2, y =exercise, by = "GSS_CODE", all.x = TRUE)

#Keep only relavent IMD variables. We will use the score variables.
childdata2<- childdata2[,c("GSS_CODE","obesity_rate","fast_food_density","active_rate",
                           "percent_white",                                                                                  
                           "percent_mix",                                                                                   
                           "percent_asian",                                                                                  
                           "percent_african",
                           "Income...Average.rank",
                           "Employment...Average.rank",
                           "Education..Skills.and.Training...Average.rank",
                           "Health.Deprivation.and.Disability...Average.rank",
                           "Crime...Average.rank",
                           "Barriers.to.Housing.and.Services...Average.rank",
                           "Living.Environment...Average.rank",
                           "IDACI...Average.rank",
                           "Income...Average.score",
                           "Employment...Average.score",
                           "Education..Skills.and.Training...Average.score",
                           "Health.Deprivation.and.Disability...Average.score",
                           "Crime...Average.score",
                           "Barriers.to.Housing.and.Services...Average.score",
                           "Living.Environment...Average.score",
                           "IDACI...Average.score",
                           "fairly_active_rate","less_active_rate" )]
names(childdata2)
##  [1] "GSS_CODE"                                         
##  [2] "obesity_rate"                                     
##  [3] "fast_food_density"                                
##  [4] "active_rate"                                      
##  [5] "percent_white"                                    
##  [6] "percent_mix"                                      
##  [7] "percent_asian"                                    
##  [8] "percent_african"                                  
##  [9] "Income...Average.rank"                            
## [10] "Employment...Average.rank"                        
## [11] "Education..Skills.and.Training...Average.rank"    
## [12] "Health.Deprivation.and.Disability...Average.rank" 
## [13] "Crime...Average.rank"                             
## [14] "Barriers.to.Housing.and.Services...Average.rank"  
## [15] "Living.Environment...Average.rank"                
## [16] "IDACI...Average.rank"                             
## [17] "Income...Average.score"                           
## [18] "Employment...Average.score"                       
## [19] "Education..Skills.and.Training...Average.score"   
## [20] "Health.Deprivation.and.Disability...Average.score"
## [21] "Crime...Average.score"                            
## [22] "Barriers.to.Housing.and.Services...Average.score" 
## [23] "Living.Environment...Average.score"               
## [24] "IDACI...Average.score"                            
## [25] "fairly_active_rate"                               
## [26] "less_active_rate"
#Rename variables with clearer names.


names(childdata2)[3] <- "Density of fast food outlets"
names(childdata2)[4] <- "Active rate"
names(childdata2)[5] <- "Percentage of White children"
names(childdata2)[6] <- "Percentage of Mixed children"
names(childdata2)[7] <- "Percentage of Asian children"
names(childdata2)[8] <- "Percentage of African children"
names(childdata2)[17] <- "IMD income average score"
names(childdata2)[18] <- "IMD employment average score"
names(childdata2)[19] <- "IMD education average score"
names(childdata2)[20] <- "IMD health average score"
names(childdata2)[21] <- "IMD crime average score"
names(childdata2)[22] <- "IMD housing average score"
names(childdata2)[23] <- "IMD living average score"
names(childdata2)[24] <- "IMD IDACI average score"
names(childdata2)[25] <- "Fairly active rate"
names(childdata2)[26] <- "Less active rate"

SECTION 2: Regression analysis

Check dependent variable is normal.Appendix Figure A1

#Check dependent variable is normal.
boxplot(childdata2$obesity_rate,main="Boxplot of obesity rate")

Check transformations of dependent variable and see which one will be normal. Appendix Figure A1

#Check transformations of dependent variable and see which one will be normal.
symbox(~`obesity_rate`, childdata2, na.rm=T, powers=seq(-3,3,by=.5),main="Types of transformation",ylab="Obesity rate", xlab="Power of transformation")

#Based on this, use square root instead.
childdata2$root_ob<- sqrt(childdata2$obesity_rate)
boxplot(childdata2$root_ob,main="Boxplot of transformed obesity rate")

Join main dataset and map data

Appendix Table 1: Univariate regression

#Variable selection.
#Univariate regression first.

model_1 <- lm(root_ob ~ `Density of fast food outlets`, data = modeldata)#significant
summary(model_1)
## 
## Call:
## lm(formula = root_ob ~ `Density of fast food outlets`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.27677 -0.29938 -0.00988  0.29868  1.45546 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    3.6948563  0.0842665   43.85   <2e-16 ***
## `Density of fast food outlets` 0.0127375  0.0009756   13.06   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4625 on 313 degrees of freedom
## Multiple R-squared:  0.3526, Adjusted R-squared:  0.3505 
## F-statistic: 170.5 on 1 and 313 DF,  p-value: < 2.2e-16
model_2 <- lm(root_ob ~ `Active rate`, data = modeldata)#significant
summary(model_2)
## 
## Call:
## lm(formula = root_ob ~ `Active rate`, data = modeldata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.5186 -0.3957 -0.0229  0.4089  1.4139 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     5.8446     0.2481  23.558  < 2e-16 ***
## `Active rate`  -2.3662     0.5207  -4.544 8.38e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5489 on 266 degrees of freedom
##   (47 observations deleted due to missingness)
## Multiple R-squared:  0.07204,    Adjusted R-squared:  0.06855 
## F-statistic: 20.65 on 1 and 266 DF,  p-value: 8.378e-06
model_3 <- lm(root_ob ~ `Percentage of White children`, data = modeldata)#significant
summary(model_3)
## 
## Call:
## lm(formula = root_ob ~ `Percentage of White children`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.33103 -0.35594  0.01714  0.34855  1.23013 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      6.1290     0.1569  39.073   <2e-16 ***
## `Percentage of White children`  -1.6286     0.1809  -9.001   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5123 on 313 degrees of freedom
## Multiple R-squared:  0.2056, Adjusted R-squared:  0.2031 
## F-statistic: 81.02 on 1 and 313 DF,  p-value: < 2.2e-16
model_4 <- lm(root_ob ~ `Percentage of Mixed children`, data = modeldata)#significant
summary(model_4)
## 
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.42548 -0.41214  0.01901  0.44044  1.19231 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     4.51037    0.05736  78.630  < 2e-16 ***
## `Percentage of Mixed children`  5.98789    1.24793   4.798 2.48e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5547 on 313 degrees of freedom
## Multiple R-squared:  0.06852,    Adjusted R-squared:  0.06554 
## F-statistic: 23.02 on 1 and 313 DF,  p-value: 2.481e-06
model_5 <- lm(root_ob ~ `Percentage of Asian children`, data = modeldata)#significant
summary(model_5)
## 
## Call:
## lm(formula = root_ob ~ `Percentage of Asian children`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.35098 -0.39116  0.00483  0.37521  1.20279 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     4.56331    0.03763 121.253  < 2e-16 ***
## `Percentage of Asian children`  2.50436    0.32550   7.694 1.87e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5271 on 313 degrees of freedom
## Multiple R-squared:  0.159,  Adjusted R-squared:  0.1564 
## F-statistic:  59.2 on 1 and 313 DF,  p-value: 1.874e-13
model_6 <- lm(root_ob ~ `Percentage of African children`, data = modeldata)#significant
summary(model_6)
## 
## Call:
## lm(formula = root_ob ~ `Percentage of African children`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.37075 -0.40101  0.00913  0.40737  1.21564 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       4.61152    0.03298 139.840  < 2e-16 ***
## `Percentage of African children`  4.38738    0.51796   8.471 9.66e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5184 on 313 degrees of freedom
## Multiple R-squared:  0.1865, Adjusted R-squared:  0.1839 
## F-statistic: 71.75 on 1 and 313 DF,  p-value: 9.661e-16
model_7 <- lm(root_ob ~ `IMD income average score`, data = modeldata)#significant
summary(model_7)
## 
## Call:
## lm(formula = root_ob ~ `IMD income average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.83874 -0.20839  0.01143  0.19565  0.90327 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 3.53386    0.05152   68.60   <2e-16 ***
## `IMD income average score` 10.38165    0.41299   25.14   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3308 on 313 degrees of freedom
## Multiple R-squared:  0.6688, Adjusted R-squared:  0.6677 
## F-statistic: 631.9 on 1 and 313 DF,  p-value: < 2.2e-16
model_8 <- lm(root_ob ~ `IMD employment average score`, data = modeldata)#significant
summary(model_8)
## 
## Call:
## lm(formula = root_ob ~ `IMD employment average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.95236 -0.26248 -0.01322  0.21784  1.14273 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     3.63327    0.06408   56.70   <2e-16 ***
## `IMD employment average score` 12.00710    0.65060   18.46   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3978 on 313 degrees of freedom
## Multiple R-squared:  0.5211, Adjusted R-squared:  0.5196 
## F-statistic: 340.6 on 1 and 313 DF,  p-value: < 2.2e-16
model_9 <- lm(root_ob ~ `IMD education average score`, data = modeldata)#significant
summary(model_9)
## 
## Call:
## lm(formula = root_ob ~ `IMD education average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.92965 -0.30765 -0.06097  0.22807  1.38491 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   3.866392   0.064383   60.05   <2e-16 ***
## `IMD education average score` 0.041999   0.002851   14.73   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4417 on 313 degrees of freedom
## Multiple R-squared:  0.4095, Adjusted R-squared:  0.4076 
## F-statistic:   217 on 1 and 313 DF,  p-value: < 2.2e-16
model_10 <- lm(root_ob ~ `IMD health average score`, data = modeldata)#significant
summary(model_10)
## 
## Call:
## lm(formula = root_ob ~ `IMD health average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.96863 -0.24078 -0.00531  0.19580  1.36286 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 4.82447    0.02311  208.74   <2e-16 ***
## `IMD health average score`  0.64498    0.03567   18.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.402 on 313 degrees of freedom
## Multiple R-squared:  0.5109, Adjusted R-squared:  0.5094 
## F-statistic:   327 on 1 and 313 DF,  p-value: < 2.2e-16
model_11 <- lm(root_ob ~ `IMD crime average score`, data = modeldata)#significant
summary(model_11)                                 
## 
## Call:
## lm(formula = root_ob ~ `IMD crime average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.23527 -0.27003 -0.00924  0.28329  1.31520 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                4.83803    0.02583  187.33   <2e-16 ***
## `IMD crime average score`  0.71579    0.04895   14.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.443 on 313 degrees of freedom
## Multiple R-squared:  0.4059, Adjusted R-squared:  0.404 
## F-statistic: 213.8 on 1 and 313 DF,  p-value: < 2.2e-16
model_12 <- lm(root_ob ~ `IMD housing average score`, data = modeldata)#insignificant
summary(model_12)                       
## 
## Call:
## lm(formula = root_ob ~ `IMD housing average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.46163 -0.46095  0.01413  0.44805  1.42131 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 4.639841   0.118212  39.250   <2e-16 ***
## `IMD housing average score` 0.004669   0.005240   0.891    0.374    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5741 on 313 degrees of freedom
## Multiple R-squared:  0.002531,   Adjusted R-squared:  -0.0006562 
## F-statistic: 0.7941 on 1 and 313 DF,  p-value: 0.3735
model_13 <- lm(root_ob ~ `IMD living average score`, data = modeldata)#significant
summary(model_13) 
## 
## Call:
## lm(formula = root_ob ~ `IMD living average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.31999 -0.40801  0.01659  0.39484  1.30631 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                4.317863   0.078347  55.112  < 2e-16 ***
## `IMD living average score` 0.020840   0.003548   5.874 1.09e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5455 on 313 degrees of freedom
## Multiple R-squared:  0.09928,    Adjusted R-squared:  0.09641 
## F-statistic:  34.5 on 1 and 313 DF,  p-value: 1.088e-08
model_14 <- lm(root_ob ~ `IMD IDACI average score`, data = modeldata)#significant
summary(model_14)                           
## 
## Call:
## lm(formula = root_ob ~ `IMD IDACI average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76574 -0.21304 -0.00681  0.20497  0.96490 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                3.52748    0.05158   68.38   <2e-16 ***
## `IMD IDACI average score`  7.91058    0.31361   25.22   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3301 on 313 degrees of freedom
## Multiple R-squared:  0.6703, Adjusted R-squared:  0.6692 
## F-statistic: 636.3 on 1 and 313 DF,  p-value: < 2.2e-16
model_15 <- lm(root_ob ~ `Fairly active rate`, data = modeldata)#insignificant
summary(model_15) 
## 
## Call:
## lm(formula = root_ob ~ `Fairly active rate`, data = modeldata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4100 -0.4365 -0.0132  0.4096  1.4411 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            4.9832     0.2022  24.641   <2e-16 ***
## `Fairly active rate`  -1.0781     0.8308  -1.298    0.196    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.567 on 265 degrees of freedom
##   (48 observations deleted due to missingness)
## Multiple R-squared:  0.006315,   Adjusted R-squared:  0.002565 
## F-statistic: 1.684 on 1 and 265 DF,  p-value: 0.1955
model_16 <- lm(root_ob ~ `Less active rate`, data = modeldata)#significant
summary(model_16)
## 
## Call:
## lm(formula = root_ob ~ `Less active rate`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.40039 -0.38736 -0.04242  0.38638  1.43481 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.7469     0.1612   23.24  < 2e-16 ***
## `Less active rate`   3.3987     0.5473    6.21 2.03e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5325 on 266 degrees of freedom
##   (47 observations deleted due to missingness)
## Multiple R-squared:  0.1266, Adjusted R-squared:  0.1233 
## F-statistic: 38.56 on 1 and 266 DF,  p-value: 2.03e-09

Check correlation of model variables.

##Figure 2. Visualise the correlation matrix

#visualise the correlation matrix
ggcorrplot(corr, hc.order = TRUE, type = "lower",
           outline.col = "white",tl.cex = 11,
           colors = c("#6D9EC1", "white", "#E46726"))

Final model selection based on F test, adjusted R etc.

#Final model selection based on F test, adjusted R etc.
model_a <-lm(root_ob ~`Density of fast food outlets`+   `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score`+ `IMD crime average score`+`IMD living average score`+   `IMD IDACI average score`+`Less active rate`, data = modeldata)
summary(model_a)
## 
## Call:
## lm(formula = root_ob ~ `Density of fast food outlets` + `Percentage of Mixed children` + 
##     `Percentage of Asian children` + `Percentage of African children` + 
##     `IMD education average score` + `IMD health average score` + 
##     `IMD crime average score` + `IMD living average score` + 
##     `IMD IDACI average score` + `Less active rate`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78546 -0.19218  0.00912  0.18340  0.74284 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       4.015791   0.190873  21.039  < 2e-16 ***
## `Density of fast food outlets`   -0.001369   0.001109  -1.235 0.217986    
## `Percentage of Mixed children`   -4.626314   1.398842  -3.307 0.001077 ** 
## `Percentage of Asian children`    1.362550   0.265761   5.127  5.8e-07 ***
## `Percentage of African children`  5.057605   0.589319   8.582  9.0e-16 ***
## `IMD education average score`     0.016403   0.004176   3.928 0.000110 ***
## `IMD health average score`        0.253100   0.072771   3.478 0.000593 ***
## `IMD crime average score`        -0.053869   0.057342  -0.939 0.348392    
## `IMD living average score`       -0.004143   0.002324  -1.783 0.075783 .  
## `IMD IDACI average score`         2.928125   0.897886   3.261 0.001260 ** 
## `Less active rate`                0.306062   0.304036   1.007 0.315043    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2655 on 257 degrees of freedom
##   (47 observations deleted due to missingness)
## Multiple R-squared:  0.7902, Adjusted R-squared:  0.782 
## F-statistic:  96.8 on 10 and 257 DF,  p-value: < 2.2e-16
#Remove less active rate
model_b <-lm(root_ob ~`Density of fast food outlets`+   `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score`+ `IMD crime average score`+`IMD living average score`+   `IMD IDACI average score`, data = modeldata)
summary(model_b)
## 
## Call:
## lm(formula = root_ob ~ `Density of fast food outlets` + `Percentage of Mixed children` + 
##     `Percentage of Asian children` + `Percentage of African children` + 
##     `IMD education average score` + `IMD health average score` + 
##     `IMD crime average score` + `IMD living average score` + 
##     `IMD IDACI average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73966 -0.18434 -0.01111  0.18002  1.23216 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       3.866e+00  1.467e-01  26.345  < 2e-16 ***
## `Density of fast food outlets`   -5.435e-05  9.435e-04  -0.058 0.954099    
## `Percentage of Mixed children`   -3.148e+00  1.348e+00  -2.336 0.020165 *  
## `Percentage of Asian children`    1.171e+00  2.258e-01   5.188 3.89e-07 ***
## `Percentage of African children`  4.485e+00  5.398e-01   8.309 3.21e-15 ***
## `IMD education average score`     1.997e-02  3.791e-03   5.268 2.61e-07 ***
## `IMD health average score`        1.730e-01  6.652e-02   2.601 0.009743 ** 
## `IMD crime average score`        -3.687e-02  5.348e-02  -0.689 0.491088    
## `IMD living average score`       -3.138e-03  2.183e-03  -1.438 0.151476    
## `IMD IDACI average score`         2.936e+00  8.213e-01   3.575 0.000406 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2779 on 305 degrees of freedom
## Multiple R-squared:  0.7722, Adjusted R-squared:  0.7655 
## F-statistic: 114.9 on 9 and 305 DF,  p-value: < 2.2e-16
#Remove fast food shop density
model_c <-lm(root_ob ~  `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score`+ `IMD crime average score`+`IMD living average score`+   `IMD IDACI average score`, data = modeldata)
summary(model_c) 
## 
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` + 
##     `Percentage of African children` + `IMD education average score` + 
##     `IMD health average score` + `IMD crime average score` + 
##     `IMD living average score` + `IMD IDACI average score`, data = modeldata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7391 -0.1841 -0.0108  0.1802  1.2335 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       3.862160   0.131781  29.307  < 2e-16 ***
## `Percentage of Mixed children`   -3.157068   1.335894  -2.363 0.018740 *  
## `Percentage of Asian children`    1.170565   0.225049   5.201 3.63e-07 ***
## `Percentage of African children`  4.488435   0.536011   8.374 2.03e-15 ***
## `IMD education average score`     0.020010   0.003727   5.368 1.57e-07 ***
## `IMD health average score`        0.171782   0.062841   2.734 0.006629 ** 
## `IMD crime average score`        -0.037292   0.052893  -0.705 0.481324    
## `IMD living average score`       -0.003164   0.002132  -1.484 0.138755    
## `IMD IDACI average score`         2.930233   0.813056   3.604 0.000366 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2774 on 306 degrees of freedom
## Multiple R-squared:  0.7722, Adjusted R-squared:  0.7663 
## F-statistic: 129.7 on 8 and 306 DF,  p-value: < 2.2e-16
#Remove living average score
model_d <-lm(root_ob ~  `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score`+ `IMD crime average score`+  `IMD IDACI average score`, data = modeldata)
summary(model_d) 
## 
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` + 
##     `Percentage of African children` + `IMD education average score` + 
##     `IMD health average score` + `IMD crime average score` + 
##     `IMD IDACI average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71569 -0.18310 -0.00798  0.18331  1.16380 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       3.812813   0.127768  29.842  < 2e-16 ***
## `Percentage of Mixed children`   -3.215825   1.337921  -2.404 0.016827 *  
## `Percentage of Asian children`    1.103218   0.220859   4.995 9.89e-07 ***
## `Percentage of African children`  4.396905   0.533494   8.242 4.99e-15 ***
## `IMD education average score`     0.020879   0.003688   5.661 3.45e-08 ***
## `IMD health average score`        0.156840   0.062151   2.524 0.012122 *  
## `IMD crime average score`        -0.022629   0.052065  -0.435 0.664138    
## `IMD IDACI average score`         2.778762   0.808206   3.438 0.000667 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.278 on 307 degrees of freedom
## Multiple R-squared:  0.7706, Adjusted R-squared:  0.7653 
## F-statistic: 147.3 on 7 and 307 DF,  p-value: < 2.2e-16
#Remove crime average score instead of living average score
model_e <-lm(root_ob ~  `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score`+ `IMD living average score`+ `IMD IDACI average score`, data = modeldata)
summary(model_e) 
## 
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` + 
##     `Percentage of African children` + `IMD education average score` + 
##     `IMD health average score` + `IMD living average score` + 
##     `IMD IDACI average score`, data = modeldata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.7369 -0.1846 -0.0076  0.1777  1.2215 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       3.904688   0.117068  33.354  < 2e-16 ***
## `Percentage of Mixed children`   -3.420394   1.281570  -2.669 0.008014 ** 
## `Percentage of Asian children`    1.136345   0.219573   5.175 4.12e-07 ***
## `Percentage of African children`  4.526474   0.532851   8.495 8.68e-16 ***
## `IMD education average score`     0.019997   0.003724   5.370 1.56e-07 ***
## `IMD health average score`        0.169717   0.062721   2.706 0.007193 ** 
## `IMD living average score`       -0.002884   0.002093  -1.378 0.169209    
## `IMD IDACI average score`         2.723392   0.757660   3.594 0.000379 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2772 on 307 degrees of freedom
## Multiple R-squared:  0.7718, Adjusted R-squared:  0.7666 
## F-statistic: 148.4 on 7 and 307 DF,  p-value: < 2.2e-16
#Remove crime average score and living average score
model_f <-lm(root_ob ~  `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score` +    `IMD IDACI average score`, data = modeldata)
summary(model_f)
## 
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` + 
##     `Percentage of African children` + `IMD education average score` + 
##     `IMD health average score` + `IMD IDACI average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71559 -0.18097 -0.01082  0.18078  1.16015 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       3.842304   0.108118  35.538  < 2e-16 ***
## `Percentage of Mixed children`   -3.378109   1.283071  -2.633 0.008895 ** 
## `Percentage of Asian children`    1.085460   0.216761   5.008 9.29e-07 ***
## `Percentage of African children`  4.425927   0.528602   8.373 2.00e-15 ***
## `IMD education average score`     0.020823   0.003681   5.657 3.52e-08 ***
## `IMD health average score`        0.156375   0.062060   2.520 0.012249 *  
## `IMD IDACI average score`         2.657163   0.757236   3.509 0.000517 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2776 on 308 degrees of freedom
## Multiple R-squared:  0.7704, Adjusted R-squared:  0.766 
## F-statistic: 172.3 on 6 and 308 DF,  p-value: < 2.2e-16

Stepwise AIC and best subset variable selection (reference only).

# stepwise aic regression
#Run for all variables
ols_step_both_aic(model_a)
## 
## 
##                                           Stepwise Summary                                           
## ---------------------------------------------------------------------------------------------------
## Variable                             Method       AIC       RSS      Sum Sq     R-Sq      Adj. R-Sq 
## ---------------------------------------------------------------------------------------------------
## `IMD IDACI average score`           addition    199.568    34.097    69.312    0.67027      0.66922 
## `Less active rate`                  addition    155.185    27.175    59.176    0.68529      0.68292 
## `Percentage of African children`    addition    117.903    23.470    62.881    0.72820      0.72511 
## `IMD education average score`       addition     96.236    21.486    64.865    0.75117      0.74739 
## `IMD health average score`          addition     83.766    20.357    65.994    0.76425      0.75975 
## `Percentage of Asian children`      addition     74.845    19.544    66.807    0.77367      0.76846 
## `Percentage of Mixed children`      addition     63.230    18.576    67.775    0.78488      0.77908 
## `IMD living average score`          addition     61.559    18.323    68.028    0.78780      0.78125 
## `Density of fast food outlets`      addition     61.427    18.178    68.173    0.78948      0.78214 
## ---------------------------------------------------------------------------------------------------
k <- ols_step_both_aic(model_a)
ols_step_both_aic(model_a, details = FALSE)
## 
## 
##                                           Stepwise Summary                                           
## ---------------------------------------------------------------------------------------------------
## Variable                             Method       AIC       RSS      Sum Sq     R-Sq      Adj. R-Sq 
## ---------------------------------------------------------------------------------------------------
## `IMD IDACI average score`           addition    199.568    34.097    69.312    0.67027      0.66922 
## `Less active rate`                  addition    155.185    27.175    59.176    0.68529      0.68292 
## `Percentage of African children`    addition    117.903    23.470    62.881    0.72820      0.72511 
## `IMD education average score`       addition     96.236    21.486    64.865    0.75117      0.74739 
## `IMD health average score`          addition     83.766    20.357    65.994    0.76425      0.75975 
## `Percentage of Asian children`      addition     74.845    19.544    66.807    0.77367      0.76846 
## `Percentage of Mixed children`      addition     63.230    18.576    67.775    0.78488      0.77908 
## `IMD living average score`          addition     61.559    18.323    68.028    0.78780      0.78125 
## `Density of fast food outlets`      addition     61.427    18.178    68.173    0.78948      0.78214 
## ---------------------------------------------------------------------------------------------------
ols_step_best_subset(model_a)
##                                                                                                                                         Best Subsets Regression                                                                                                                                        
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Model Index    Predictors
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##      1         `IMD IDACI average score`                                                                                                                                                                                                                                                                
##      2         `Percentage of African children` `IMD IDACI average score`                                                                                                                                                                                                                               
##      3         `Percentage of African children` `IMD education average score` `IMD IDACI average score`                                                                                                                                                                                                 
##      4         `Percentage of African children` `IMD education average score` `IMD health average score` `Less active rate`                                                                                                                                                                             
##      5         `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD health average score` `Less active rate`                                                                                                                                              
##      6         `Percentage of Mixed children` `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD IDACI average score` `Less active rate`                                                                                                                
##      7         `Percentage of Mixed children` `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD health average score` `IMD IDACI average score` `Less active rate`                                                                                     
##      8         `Percentage of Mixed children` `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD health average score` `IMD living average score` `IMD IDACI average score` `Less active rate`                                                          
##      9         `Density of fast food outlets` `Percentage of Mixed children` `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD health average score` `IMD living average score` `IMD IDACI average score` `Less active rate`                           
##     10         `Density of fast food outlets` `Percentage of Mixed children` `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD health average score` `IMD crime average score` `IMD living average score` `IMD IDACI average score` `Less active rate` 
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## 
##                                                    Subsets Regression Summary                                                    
## ---------------------------------------------------------------------------------------------------------------------------------
##                        Adj.        Pred                                                                                           
## Model    R-Square    R-Square    R-Square      C(p)        AIC         SBIC         SBC        MSEP      FPE       HSP      APC  
## ---------------------------------------------------------------------------------------------------------------------------------
##   1        0.6703      0.6692      0.6658    172.7044    199.5678    -696.0019    210.8255    0.1096    0.1096    3e-04    0.3339 
##   2        0.7096      0.7078       0.703    116.9879    161.5430    -734.0873    176.5532    0.0972    0.0972    3e-04    0.2960 
##   3        0.7439      0.7414      0.7359     68.6655    123.9438    -771.3316    142.7066    0.0862    0.0862    3e-04    0.2627 
##   4        0.7588      0.7552      0.7467     37.4353     87.8608    -673.6362    109.4067    0.0807    0.0807    3e-04    0.2503 
##   5        0.7694      0.7650      0.7565     26.5443     77.9041    -683.2700    103.0410    0.0778    0.0777    3e-04    0.2412 
##   6        0.7780      0.7729       0.763     17.9586     69.6703    -691.0849     98.3982    0.0754    0.0754    3e-04    0.2339 
##   7        0.7849      0.7791      0.7677     11.5270     63.2300    -697.0501     95.5489    0.0737    0.0736    3e-04    0.2284 
##   8        0.7878      0.7812      0.7696      9.9416     61.5587    -698.4363     97.4685    0.0732    0.0731    3e-04    0.2269 
##   9        0.7895      0.7821      0.7694      9.8825     61.4273    -698.3421    100.9282    0.0732    0.0731    3e-04    0.2268 
##  10        0.7902      0.7820      0.7682     11.0000     62.5086    -697.1045    105.6004    0.0735    0.0734    3e-04    0.2278 
## ---------------------------------------------------------------------------------------------------------------------------------
## AIC: Akaike Information Criteria 
##  SBIC: Sawa's Bayesian Information Criteria 
##  SBC: Schwarz Bayesian Criteria 
##  MSEP: Estimated error of prediction, assuming multivariate normality 
##  FPE: Final Prediction Error 
##  HSP: Hocking's Sp 
##  APC: Amemiya Prediction Criteria
#Run for model f
ols_step_both_aic(model_f)
## 
## 
##                                           Stepwise Summary                                           
## ---------------------------------------------------------------------------------------------------
## Variable                             Method       AIC       RSS      Sum Sq     R-Sq      Adj. R-Sq 
## ---------------------------------------------------------------------------------------------------
## `IMD IDACI average score`           addition    199.568    34.097    69.312    0.67027      0.66922 
## `Percentage of African children`    addition    161.543    30.028    73.380    0.70962      0.70776 
## `IMD education average score`       addition    123.944    26.481    76.928    0.74392      0.74145 
## `Percentage of Asian children`      addition    109.707    25.150    78.258    0.75679      0.75365 
## `Percentage of Mixed children`      addition     99.939    24.228    79.180    0.76570      0.76191 
## `IMD health average score`          addition     95.512    23.739    79.670    0.77044      0.76596 
## ---------------------------------------------------------------------------------------------------
k <- ols_step_both_aic(model_f)
ols_step_both_aic(model_f, details = FALSE)
## 
## 
##                                           Stepwise Summary                                           
## ---------------------------------------------------------------------------------------------------
## Variable                             Method       AIC       RSS      Sum Sq     R-Sq      Adj. R-Sq 
## ---------------------------------------------------------------------------------------------------
## `IMD IDACI average score`           addition    199.568    34.097    69.312    0.67027      0.66922 
## `Percentage of African children`    addition    161.543    30.028    73.380    0.70962      0.70776 
## `IMD education average score`       addition    123.944    26.481    76.928    0.74392      0.74145 
## `Percentage of Asian children`      addition    109.707    25.150    78.258    0.75679      0.75365 
## `Percentage of Mixed children`      addition     99.939    24.228    79.180    0.76570      0.76191 
## `IMD health average score`          addition     95.512    23.739    79.670    0.77044      0.76596 
## ---------------------------------------------------------------------------------------------------
ols_step_best_subset(model_f)
##                                                                                     Best Subsets Regression                                                                                     
## ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Model Index    Predictors
## ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
##      1         `IMD IDACI average score`                                                                                                                                                         
##      2         `Percentage of African children` `IMD IDACI average score`                                                                                                                        
##      3         `Percentage of African children` `IMD education average score` `IMD IDACI average score`                                                                                          
##      4         `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD health average score`                                                          
##      5         `Percentage of Mixed children` `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD IDACI average score`                            
##      6         `Percentage of Mixed children` `Percentage of Asian children` `Percentage of African children` `IMD education average score` `IMD health average score` `IMD IDACI average score` 
## ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## 
##                                                    Subsets Regression Summary                                                    
## ---------------------------------------------------------------------------------------------------------------------------------
##                        Adj.        Pred                                                                                           
## Model    R-Square    R-Square    R-Square      C(p)        AIC         SBIC         SBC        MSEP      FPE       HSP      APC  
## ---------------------------------------------------------------------------------------------------------------------------------
##   1        0.6703      0.6692      0.6658    131.3857    199.5678    -695.6811    210.8255    0.1096    0.1096    3e-04    0.3339 
##   2        0.7096      0.7078       0.703     80.5994    161.5430    -733.6105    176.5532    0.0972    0.0972    3e-04    0.2960 
##   3        0.7439      0.7414      0.7359     36.5756    123.9438    -770.6667    142.7066    0.0862    0.0862    3e-04    0.2627 
##   4        0.7587      0.7556        0.75     18.7370    107.2089    -786.9937    129.7243    0.0818    0.0818    3e-04    0.2491 
##   5        0.7657      0.7619      0.7542     11.3492     99.9394    -793.9671    126.2074    0.0799    0.0799    3e-04    0.2434 
##   6        0.7704      0.7660      0.7569      7.0000     95.5119    -798.1022    125.5325    0.0788    0.0788    3e-04    0.2400 
## ---------------------------------------------------------------------------------------------------------------------------------
## AIC: Akaike Information Criteria 
##  SBIC: Sawa's Bayesian Information Criteria 
##  SBC: Schwarz Bayesian Criteria 
##  MSEP: Estimated error of prediction, assuming multivariate normality 
##  FPE: Final Prediction Error 
##  HSP: Hocking's Sp 
##  APC: Amemiya Prediction Criteria
#save the residuals into dataframe
finalmodel<- lm(root_ob ~   `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score` +    `IMD IDACI average score`, data = modeldata)
summary(finalmodel) 
## 
## Call:
## lm(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` + 
##     `Percentage of African children` + `IMD education average score` + 
##     `IMD health average score` + `IMD IDACI average score`, data = modeldata)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71559 -0.18097 -0.01082  0.18078  1.16015 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       3.842304   0.108118  35.538  < 2e-16 ***
## `Percentage of Mixed children`   -3.378109   1.283071  -2.633 0.008895 ** 
## `Percentage of Asian children`    1.085460   0.216761   5.008 9.29e-07 ***
## `Percentage of African children`  4.425927   0.528602   8.373 2.00e-15 ***
## `IMD education average score`     0.020823   0.003681   5.657 3.52e-08 ***
## `IMD health average score`        0.156375   0.062060   2.520 0.012249 *  
## `IMD IDACI average score`         2.657163   0.757236   3.509 0.000517 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2776 on 308 degrees of freedom
## Multiple R-squared:  0.7704, Adjusted R-squared:  0.766 
## F-statistic: 172.3 on 6 and 308 DF,  p-value: < 2.2e-16
#Check multicollinearity
vif(finalmodel)
##   `Percentage of Mixed children`   `Percentage of Asian children` 
##                         4.220859                         1.598609 
## `Percentage of African children`    `IMD education average score` 
##                         3.631903                         4.220017 
##       `IMD health average score`        `IMD IDACI average score` 
##                         6.346407                         8.240346
ObesityMAP$`Model residuals`<- finalmodel$residuals

Figure 3. Diagnostic plots for OLS regression model

#residual plot
par(mfrow = c(2, 2))  # Split the plotting panel into a 2 x 2 grid
plot(finalmodel)  # Plot the model residuals

Cross-validation of model

###Cross-validation of model
#####Cross-Validation 
set.seed(48)
regressdata<-modeldata[,c("Percentage of Mixed children","Percentage of Asian children",
                          "Percentage of African children","IMD education average score",
                          "IMD health average score","IMD IDACI average score",
                       "root_ob" )]
model<- train(root_ob ~ `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score` +    `IMD IDACI average score`,
               regressdata,
              method="lm", 
              trControl=trainControl(method="repeatedcv",
                                     number=10,
                                     verboseIter=TRUE))
## + Fold01.Rep1: intercept=TRUE 
## - Fold01.Rep1: intercept=TRUE 
## + Fold02.Rep1: intercept=TRUE 
## - Fold02.Rep1: intercept=TRUE 
## + Fold03.Rep1: intercept=TRUE 
## - Fold03.Rep1: intercept=TRUE 
## + Fold04.Rep1: intercept=TRUE 
## - Fold04.Rep1: intercept=TRUE 
## + Fold05.Rep1: intercept=TRUE 
## - Fold05.Rep1: intercept=TRUE 
## + Fold06.Rep1: intercept=TRUE 
## - Fold06.Rep1: intercept=TRUE 
## + Fold07.Rep1: intercept=TRUE 
## - Fold07.Rep1: intercept=TRUE 
## + Fold08.Rep1: intercept=TRUE 
## - Fold08.Rep1: intercept=TRUE 
## + Fold09.Rep1: intercept=TRUE 
## - Fold09.Rep1: intercept=TRUE 
## + Fold10.Rep1: intercept=TRUE 
## - Fold10.Rep1: intercept=TRUE 
## Aggregating results
## Fitting final model on full training set
summary(model)
## 
## Call:
## lm(formula = .outcome ~ ., data = dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.71559 -0.18097 -0.01082  0.18078  1.16015 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             3.842304   0.108118  35.538  < 2e-16
## `\\`Percentage of Mixed children\\``   -3.378109   1.283071  -2.633 0.008895
## `\\`Percentage of Asian children\\``    1.085460   0.216761   5.008 9.29e-07
## `\\`Percentage of African children\\``  4.425927   0.528602   8.373 2.00e-15
## `\\`IMD education average score\\``     0.020823   0.003681   5.657 3.52e-08
## `\\`IMD health average score\\``        0.156375   0.062060   2.520 0.012249
## `\\`IMD IDACI average score\\``         2.657163   0.757236   3.509 0.000517
##                                           
## (Intercept)                            ***
## `\\`Percentage of Mixed children\\``   ** 
## `\\`Percentage of Asian children\\``   ***
## `\\`Percentage of African children\\`` ***
## `\\`IMD education average score\\``    ***
## `\\`IMD health average score\\``       *  
## `\\`IMD IDACI average score\\``        ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2776 on 308 degrees of freedom
## Multiple R-squared:  0.7704, Adjusted R-squared:  0.766 
## F-statistic: 172.3 on 6 and 308 DF,  p-value: < 2.2e-16

#print R2, MAE and RMSE

#print R2, MAE and RMSE
print(model)
## Linear Regression 
## 
## 315 samples
##   6 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 1 times) 
## Summary of sample sizes: 283, 284, 283, 284, 283, 283, ... 
## Resampling results:
## 
##   RMSE       Rsquared   MAE      
##   0.2795769  0.7716322  0.2217382
## 
## Tuning parameter 'intercept' was held constant at a value of TRUE

#Now check for spatial auto-correlation

ObesityMAP <- as(ObesityMAP,"Spatial")
names(ObesityMAP)
##  [1] "GSS_CODE"                                        
##  [2] "FID"                                             
##  [3] "LAD19NM"                                         
##  [4] "LAD19NMW"                                        
##  [5] "BNG_E"                                           
##  [6] "BNG_N"                                           
##  [7] "LONG"                                            
##  [8] "LAT"                                             
##  [9] "Shape__Area"                                     
## [10] "Shape__Length"                                   
## [11] "obesity_rate"                                    
## [12] "Density.of.fast.food.outlets"                    
## [13] "Active.rate"                                     
## [14] "Percentage.of.White.children"                    
## [15] "Percentage.of.Mixed.children"                    
## [16] "Percentage.of.Asian.children"                    
## [17] "Percentage.of.African.children"                  
## [18] "Income...Average.rank"                           
## [19] "Employment...Average.rank"                       
## [20] "Education..Skills.and.Training...Average.rank"   
## [21] "Health.Deprivation.and.Disability...Average.rank"
## [22] "Crime...Average.rank"                            
## [23] "Barriers.to.Housing.and.Services...Average.rank" 
## [24] "Living.Environment...Average.rank"               
## [25] "IDACI...Average.rank"                            
## [26] "IMD.income.average.score"                        
## [27] "IMD.employment.average.score"                    
## [28] "IMD.education.average.score"                     
## [29] "IMD.health.average.score"                        
## [30] "IMD.crime.average.score"                         
## [31] "IMD.housing.average.score"                       
## [32] "IMD.living.average.score"                        
## [33] "IMD.IDACI.average.score"                         
## [34] "Fairly.active.rate"                              
## [35] "Less.active.rate"                                
## [36] "root_ob"                                         
## [37] "Model.residuals"
#Calculate centriod of each LA.
coordsW <- coordinates(ObesityMAP)
plot(coordsW)

#Neighbours list of queens contiguity
LWard_nb <- poly2nb(ObesityMAP, queen=T)

#and nearest neighbours
knn_wards <- knearneigh(coordsW, k=4)
LWard_knn <- knn2nb(knn_wards)

#plot and add a map
plot(LWard_nb, coordinates(coordsW), col="red")

plot(LWard_knn, coordinates(coordsW), col="blue")

plot(ObesityMAP)

#create a spatial weights matrix 
Lward.queens_weight <- nb2listw(LWard_nb, style="C",zero.policy=TRUE)
Lward.knn_4_weight <- nb2listw(LWard_knn, style="C",zero.policy=TRUE)

#Run Moran’s I using the residuals from our final model #first using queens neighbours

moran.test(ObesityMAP@data$`Model.residuals`, Lward.queens_weight,zero.policy=TRUE)
## 
##  Moran I test under randomisation
## 
## data:  ObesityMAP@data$Model.residuals  
## weights: Lward.queens_weight  n reduced by no-neighbour observations
##   
## 
## Moran I statistic standard deviate = 7.9544, p-value = 9e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.271642496      -0.003194888       0.001193815

#Then knn = 4

moran.test(ObesityMAP@data$`Model.residuals`, Lward.knn_4_weight,zero.policy=TRUE)
## 
##  Moran I test under randomisation
## 
## data:  ObesityMAP@data$Model.residuals  
## weights: Lward.knn_4_weight    
## 
## Moran I statistic standard deviate = 7.8037, p-value = 3.005e-15
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.285285412      -0.003184713       0.001366461

##GWR(Andy MacLachlan and Adam Dennett,2019)

#calculate kernel bandwidth
GWRbandwidth <- gwr.sel(root_ob ~   `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score` +    `IMD IDACI average score`,data = modeldata,coords=coordsW,adapt=T)
## Adaptive q: 0.381966 CV score: 22.7907 
## Adaptive q: 0.618034 CV score: 23.71091 
## Adaptive q: 0.236068 CV score: 22.32975 
## Adaptive q: 0.145898 CV score: 22.26535 
## Adaptive q: 0.1565004 CV score: 22.27587 
## Adaptive q: 0.09016994 CV score: 21.82901 
## Adaptive q: 0.05572809 CV score: 21.04139 
## Adaptive q: 0.03444185 CV score: 20.49357 
## Adaptive q: 0.02128624 CV score: 22.45724 
## Adaptive q: 0.04257247 CV score: 20.67178 
## Adaptive q: 0.02941686 CV score: 20.76158 
## Adaptive q: 0.03659131 CV score: 20.55597 
## Adaptive q: 0.03252248 CV score: 20.55079 
## Adaptive q: 0.03451283 CV score: 20.49199 
## Adaptive q: 0.03530674 CV score: 20.49814 
## Adaptive q: 0.03479792 CV score: 20.48601 
## Adaptive q: 0.03499227 CV score: 20.4862 
## Adaptive q: 0.03488435 CV score: 20.48431 
## Adaptive q: 0.03492504 CV score: 20.48376 
## Adaptive q: 0.03492504 CV score: 20.48376
#run the gwr model
gwr.model = gwr(root_ob ~   `Percentage of Mixed children`+ `Percentage of Asian children`+ `Percentage of African children`+   `IMD education average score`+  `IMD health average score` +    `IMD IDACI average score`,data = modeldata,coords=coordsW, adapt=GWRbandwidth, hatmatrix=TRUE, se.fit=TRUE)

#print the results of the model
gwr.model
## Call:
## gwr(formula = root_ob ~ `Percentage of Mixed children` + `Percentage of Asian children` + 
##     `Percentage of African children` + `IMD education average score` + 
##     `IMD health average score` + `IMD IDACI average score`, data = modeldata, 
##     coords = coordsW, adapt = GWRbandwidth, hatmatrix = TRUE, 
##     se.fit = TRUE)
## Kernel function: gwr.Gauss 
## Adaptive quantile: 0.03492504 (about 11 of 315 data points)
## Summary of GWR coefficient estimates at data points:
##                                          Min.     1st Qu.      Median
## X.Intercept.                        2.2379325   2.9897034   3.4863385
## X.Percentage.of.Mixed.children.   -22.5861202  -3.5869974  -0.8864863
## X.Percentage.of.Asian.children.    -2.5524665   0.2057791   1.1725359
## X.Percentage.of.African.children.  -8.5687184   2.1280087   3.3279073
## X.IMD.education.average.score.     -0.0072453   0.0185664   0.0262770
## X.IMD.health.average.score.        -0.6729346  -0.1458490  -0.0352223
## X.IMD.IDACI.average.score.         -1.1677115   2.9253575   4.3309386
##                                       3rd Qu.        Max.  Global
## X.Intercept.                        3.9000246   4.5576890  3.8423
## X.Percentage.of.Mixed.children.     1.4186154  14.1164053 -3.3781
## X.Percentage.of.Asian.children.     1.5801901   2.7330463  1.0855
## X.Percentage.of.African.children.   4.1428690  10.3270702  4.4259
## X.IMD.education.average.score.      0.0332300   0.0485405  0.0208
## X.IMD.health.average.score.         0.0863856   0.3934210  0.1564
## X.IMD.IDACI.average.score.          5.3997008   9.6079104  2.6572
## Number of data points: 315 
## Effective number of parameters (residual: 2traceS - traceS'S): 92.88358 
## Effective degrees of freedom (residual: 2traceS - traceS'S): 222.1164 
## Sigma (residual: 2traceS - traceS'S): 0.2308206 
## Effective number of parameters (model: traceS): 68.62676 
## Effective degrees of freedom (model: traceS): 246.3732 
## Sigma (model: traceS): 0.2191634 
## Sigma (ML): 0.1938249 
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 39.72662 
## AIC (GWR p. 96, eq. 4.22): -71.14606 
## Residual sum of squares: 11.83395 
## Quasi-global R2: 0.8855612

#Attach coefficients to original dataframe

results<-as.data.frame(gwr.model$SDF)
names(results)
##  [1] "sum.w"                                   
##  [2] "X.Intercept."                            
##  [3] "X.Percentage.of.Mixed.children."         
##  [4] "X.Percentage.of.Asian.children."         
##  [5] "X.Percentage.of.African.children."       
##  [6] "X.IMD.education.average.score."          
##  [7] "X.IMD.health.average.score."             
##  [8] "X.IMD.IDACI.average.score."              
##  [9] "X.Intercept._se"                         
## [10] "X.Percentage.of.Mixed.children._se"      
## [11] "X.Percentage.of.Asian.children._se"      
## [12] "X.Percentage.of.African.children._se"    
## [13] "X.IMD.education.average.score._se"       
## [14] "X.IMD.health.average.score._se"          
## [15] "X.IMD.IDACI.average.score._se"           
## [16] "gwr.e"                                   
## [17] "pred"                                    
## [18] "pred.se"                                 
## [19] "localR2"                                 
## [20] "X.Intercept._se_EDF"                     
## [21] "X.Percentage.of.Mixed.children._se_EDF"  
## [22] "X.Percentage.of.Asian.children._se_EDF"  
## [23] "X.Percentage.of.African.children._se_EDF"
## [24] "X.IMD.education.average.score._se_EDF"   
## [25] "X.IMD.health.average.score._se_EDF"      
## [26] "X.IMD.IDACI.average.score._se_EDF"       
## [27] "pred.se.1"                               
## [28] "coord.x"                                 
## [29] "coord.y"
ObesityMAP@data$coefr2<-results$localR2
ObesityMAP@data$coefmixed<-results$X.Percentage.of.Mixed.children.
ObesityMAP@data$coefafrican<-results$X.Percentage.of.African.children.
ObesityMAP@data$coefedu<-results$X.IMD.education.average.score.
ObesityMAP@data$coefhealth<-results$X.IMD.health.average.score.
ObesityMAP@data$coefasian<-results$X.Percentage.of.Asian.children.
ObesityMAP@data$coefidaci<-results$X.IMD.IDACI.average.score.

Maps

#Local R-squared

tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(ObesityMAP) +
  tm_polygons(col = "coefr2", title = "Local R-squared", breaks = c(-Inf,0.7,0.75,0.8,0.85,0.9,0.95, Inf), palette = "YlGnBu", alpha = 1)+
  tm_format_World(title = NA,legend.width = 0.8, scale = 1,
                  legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(ObesityMAP) +
  tm_polygons(col = "coefafrican", title = "Coefficient for percentage of African children", palette = "RdBu", alpha = 1)+
  tm_format_World(title = NA,legend.width = 0.8, scale = 1,
                  legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefafrican" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

#Percentage of mixed children

tm_shape(ObesityMAP) +
  tm_polygons(col = "coefmixed", title = "Coefficient for percentage of Mixed children",breaks = c(-Inf,-20,-10,-5,0,5,10,15, Inf),  palette = "RdBu", alpha = 1)+
  tm_format_World(title = NA,legend.width = 0.8, scale = 1,
                legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefmixed" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

#Percentage of Asian children

tm_shape(ObesityMAP) +
  tm_polygons(col = "coefasian", title = "Coefficient for percentage of Asian children",  palette = "RdBu", alpha = 1)+
  tm_format_World(title = NA,legend.width = 0.8, scale = 1,
                legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefasian" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

#IDACI score

tm_shape(ObesityMAP) +
  tm_polygons(col = "coefidaci", title = "Coefficient for IMD IDACI average acore", breaks = c(-Inf,-2,0,2,4,6,8, Inf), palette = "RdBu", alpha = 1)+
  tm_format_World(title = NA,legend.width = 0.8, scale = 1,
                  legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefidaci" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

#IMD Health score

tm_shape(ObesityMAP) +
  tm_polygons(col = "coefhealth", title = "Coefficient for IMD Health average acore",  palette = "RdBu", alpha = 1)+
  tm_format_World(title = NA,legend.width = 0.8, scale = 1,
                  legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefhealth" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

#IMD education

tm_shape(ObesityMAP) +
  tm_polygons(col = "coefedu", title = "Coefficient for IMD Education average acore", palette = "RdBu", alpha = 1)+
  tm_format_World(title = NA,legend.width = 0.8, scale = 1,
                  legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.
## Variable "coefedu" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

SECTION 3: Cluster analysis

#render("Report code.Rmd", output_dir = output_dir, params = list(output_dir = output_dir))
modeldata2<- childdata2[,c("GSS_CODE","root_ob","Percentage of Mixed children","Percentage of Asian children","Percentage of African children",
"IMD education average score","IMD health average score","IMD IDACI average score")]

Cluster analysis(Guy Lansley and James Cheshire,2018)

clusterdata<- childdata2[,c("root_ob","Percentage of Mixed children","Percentage of Asian children","Percentage of African children",
"IMD education average score","IMD health average score","IMD IDACI average score")]
value <- colnames(clusterdata)
# creates a new data frame
stand_data <- clusterdata
for(i in 1: ncol (clusterdata)){
stand_data[, value[i]] <- scale(as.numeric(modeldata2[, value[i]]))
}

#K-means clustering (k=5)

Km <- kmeans(stand_data, 5, nstart = 25, iter.max = 1000)
KmClusters <- as.matrix(Km$cluster)
KmClusters <- as.data.frame(KmClusters)
KmCenters <- as.matrix(Km$centers)
KmCenters <- as.data.frame(KmCenters)
#Number of LAs in each cluster
table(KmClusters)
## KmClusters
##   1   2   3   4   5 
##  80 109  80  21  25

#Validate choice of k

# Total within-cluster sum of square
wss <- NULL
for (i in 1:15) wss[i] <- kmeans(stand_data,centers = i,iter.max = 1000)$tot.withinss
plot(1:15, wss, type = "b", pch = 19, xlab = "Number of Clusters",
ylab = "Total within-cluster sum of squares")

#Cluster plot of the first 2 principal components

library(cluster)
clusplot(stand_data,Km$cluster, color = TRUE, shade = FALSE,
labels = 4, lines = 0, plotchar = FALSE)

#More cluster plot
library(ggplot2)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_cluster(Km, data = stand_data, geom = "point", ellipse = F, pointsize = 0.5,
ggtheme = theme_classic())

#Radial plot for each group

library(plotrix)
KmCenters[5,]<- c(0)
par(cex.axis = 0.8, cex.lab = 0.8)
radial.plot(KmCenters[c(1,5),], labels = colnames(KmCenters),
boxed.radial = FALSE, show.radial.grid = TRUE,
line.col = c("blue", "red"), radlab = TRUE,
rp.type = "p", show.grid.labels = 3)

##Creates a map of cluster groups

#Join the cluster labels to the GSS codes
GSScode<- childdata2[,c(1,2)]
Classification <- as.data.frame(cbind(as.character(GSScode[,1]), KmClusters[,1]))
names(Classification) <- c("GSS_CODE", "Classification")

OA.Class<- merge(ObesityMAP, Classification, by.x = "GSS_CODE", by.y = "GSS_CODE",duplicateGeoms = TRUE)

# creates a map in R
tm_shape(OA.Class) +
  tm_polygons(col = "Classification", title = "Cluster groups", palette = "RdBu", alpha = 1)+
  tm_format_World(title = NA,legend.width = 0.8, scale = 1,
                  legend.position = c("left", "top"))+tm_scale_bar()+tm_compass()
## Compass not supported in view mode.
## legend.postion is used for plot mode. Use view.legend.position in tm_view to set the legend position in view mode.

Reference: Andy MacLachlan and Adam Dennett, 2019. CASA0005 Geographic Information Systems and Science. Available from: https://andrewmaclachlan.github.io/CASA0005repo/index.html

Guy Lansley and James Cheshire,2018.Creating a Geodemographic Classification Using K-means Clustering in R.Available from: https://data.cdrc.ac.uk/tutorial/creating-a-geodemographic-classification-using-k-means-clustering-in-r